library(readr)
Untreated <- readRDS(paste0(wd,"/data/NCI_TPW_gep_untreated.rds"))
Treated <- readRDS(paste0(wd,"/data/NCI_TPW_gep_treated.rds"))
Metadata = read.table(paste0(wd,"/data/NCI_TPW_metadata.tsv"), header = TRUE, sep ="\t", stringsAsFactors = TRUE)
Cellline_annotation = read.table(paste0(wd,"/data/cellline_annotation.tsv"), header = TRUE, sep ="\t", stringsAsFactors = TRUE)
Drug_annotation = read.table(paste0(wd,"/data/drug_annotation.tsv"), header = TRUE, sep ="\t", stringsAsFactors = TRUE)
Treated <- as.data.frame(Treated)
Untreated <- as.data.frame(Untreated)
#Find cell lines, which belong to vorinostat:
#Untreated matrix:
UntreatedVorinostatcolumns <- grep(pattern = "vorinostat",colnames(Untreated))
UntreatedVorinostatcolumns
## [1] 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777
## [18] 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794
## [35] 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811
## [52] 812 813 814 815 816 817 818 819
#-> seems like column 761 - 819 belongs to vorinostat. Check if that is true:
identical(UntreatedVorinostatcolumns, 761:819)
## [1] TRUE
#-> TRUE
#Same with treated matrix:
TreatedVorinostatcolumns <- grep(pattern = "vorinostat",colnames(Treated))
TreatedVorinostatcolumns
## [1] 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777
## [18] 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794
## [35] 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811
## [52] 812 813 814 815 816 817 818 819
identical(TreatedVorinostatcolumns, 761:819)
## [1] TRUE
#Define Vorinostat-data:
UntreatedVorinostat <- Untreated[,UntreatedVorinostatcolumns]
TreatedVorinostat <- Treated[,TreatedVorinostatcolumns]
#fold change matrix
FC <- TreatedVorinostat - UntreatedVorinostat
#Create a common matrix for treated and untreated vorinostat data:
VorinostatTotal <- cbind(UntreatedVorinostat, TreatedVorinostat)
# variable um Zahlen zu ersetzen
col_untreated = grep ('_0nM',colnames(VorinostatTotal))
col_untreated
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
## [24] 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
## [47] 47 48 49 50 51 52 53 54 55 56 57 58 59
col_treated = grep ('_5000nM',colnames(VorinostatTotal))
col_treated
## [1] 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
## [18] 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
## [35] 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
## [52] 111 112 113 114 115 116 117 118
library(clusterProfiler)
##
## clusterProfiler v3.10.1 For help: https://guangchuangyu.github.io/software/clusterProfiler
##
## If you use clusterProfiler in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Yanyan Han, Qing-Yu He. clusterProfiler: an R package for comparing biological themes among gene clusters. OMICS: A Journal of Integrative Biology. 2012, 16(5):284-287.
###################################################################################################
###################################################################################################
############################## creat a list of genes/biomarkers ###################################
# pValues for biomarkers
pValues <- apply(VorinostatTotal, 1, function(x) t.test(x[col_untreated],x[col_treated],paired = TRUE, alternative = "two.sided")$p.value)
# sort the p.values
sortedpValues <- sort(pValues, decreasing = FALSE)
sortedpValues <- as.matrix(sortedpValues)
# take the first 100 p.values for biomarkers
biomarkers <- sortedpValues[1:100,]
biomarkers <- as.matrix(biomarkers)
# creat vector with genes
biomarkers.genes = row.names(biomarkers)
biomarkers.genes
## [1] "MAP1LC3B" "TOE1" "ATG101"
## [4] "PI4K2A" "MEPCE" "LANCL2"
## [7] "PMF1" "RCC1L" "ZNF277"
## [10] "ANP32B" "GABARAPL2" "SPAG9"
## [13] "SLC25A44" "PPRC1" "SCMH1"
## [16] "OSER1" "NME6" "KIAA0368"
## [19] "HDGF" "HCFC1" "U2AF2"
## [22] "UBTF" "GMEB2" "PHF2"
## [25] "SEPHS1" "MRPL9" "CERS2"
## [28] "CTCF" "COIL" "DPM1"
## [31] "ZFX" "CYB5R1" "HDAC3"
## [34] "ZMYND11" "ASB6" "ARL8B"
## [37] "AFTPH" "NSMAF" "SPAG7"
## [40] "TOX4" "TUSC2" "SLC5A6"
## [43] "DHX35" "SUMO3" "PSMB5"
## [46] "SMARCB1" "AVEN" "ARFGAP2"
## [49] "TAF5L" "LSM7" "UFC1"
## [52] "ATG4A" "RBM39" "PUF60"
## [55] "RBM15B" "GABPB1" "PTPN9"
## [58] "PATZ1" "AAMDC" "SIAH1"
## [61] "BRD1" "ATP6V1D" "TOR1A"
## [64] "MICU1" "VPS52" "CGRRF1"
## [67] "CLEC16A" "FAIM" "SENP6"
## [70] "BID" "ABHD17A" "TACO1"
## [73] "MRPL11" "RNH1" "RBX1"
## [76] "RNF114" "LRRC41" "PEF1"
## [79] "DDX54" "PPP1R14B" "PTPN1"
## [82] "ZFR" "HABP4" "SUV39H1"
## [85] "FAM57A" "RAN" "MRPS7"
## [88] "KPNB1" "TXNDC15" "SF3B3"
## [91] "CCDC28A" "MIR6884///MED24" "SNX5"
## [94] "GEMIN4" "IPO4" "TRMT1"
## [97] "CETN2" "RCN1" "INPP5K"
## [100] "ZNF394"
###################################################################################################
###################################################################################################
############################ translating amog diffrent gene ID types ##############################
# install needed libary form translation
'if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("org.Hs.eg.db")'
## [1] "if (!requireNamespace(\"BiocManager\", quietly = TRUE))\n install.packages(\"BiocManager\")\n\nBiocManager::install(\"org.Hs.eg.db\")"
library(org.Hs.eg.db)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## Filter, Find, Map, Position, Reduce, anyDuplicated, append,
## as.data.frame, basename, cbind, colMeans, colSums, colnames,
## dirname, do.call, duplicated, eval, evalq, get, grep, grepl,
## intersect, is.unsorted, lapply, lengths, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, rank, rbind,
## rowMeans, rowSums, rownames, sapply, setdiff, sort, table,
## tapply, union, unique, unsplit, which, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
##
# do the translation
translated.genes= bitr(biomarkers.genes,fromType="SYMBOL", toType="ENTREZID",OrgDb = org.Hs.eg.db)
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(biomarkers.genes, fromType = "SYMBOL", toType =
## "ENTREZID", : 2% of input gene IDs are fail to map...
head(translated.genes)
## SYMBOL ENTREZID
## 1 MAP1LC3B 81631
## 2 TOE1 114034
## 3 ATG101 60673
## 4 PI4K2A 55361
## 5 MEPCE 56257
## 6 LANCL2 55915
# in which types the gene names can be translated by this libary
idType("org.Hs.eg.db")
## [1] "ACCNUM" "ALIAS" "ENSEMBL" "ENSEMBLPROT"
## [5] "ENSEMBLTRANS" "ENTREZID" "ENZYME" "EVIDENCE"
## [9] "EVIDENCEALL" "GENENAME" "GO" "GOALL"
## [13] "IPI" "MAP" "OMIM" "ONTOLOGY"
## [17] "ONTOLOGYALL" "PATH" "PFAM" "PMID"
## [21] "PROSITE" "REFSEQ" "SYMBOL" "UCSCKG"
## [25] "UNIGENE" "UNIPROT"
###################################################################################################
################################### Gene Ontology Classification ##################################
# install needed libary
'if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("DOSE")'
## [1] "if (!requireNamespace(\"BiocManager\", quietly = TRUE))\n install.packages(\"BiocManager\")\n\nBiocManager::install(\"DOSE\")"
library(DOSE)
## Warning: package 'DOSE' was built under R version 3.5.2
## DOSE v3.8.2 For help: https://guangchuangyu.github.io/DOSE
##
## If you use DOSE in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Guang-Rong Yan, Qing-Yu He. DOSE: an R/Bioconductor package for Disease Ontology Semantic and Enrichment analysis. Bioinformatics 2015, 31(4):608-609
data(biomarkers)
## Warning in data(biomarkers): data set 'biomarkers' not found
gene=translated.genes$ENTREZID
head(gene)
## [1] "81631" "114034" "60673" "55361" "56257" "55915"
###################################################################################################
# Notes: BP for Biological Process, MF for Molecular Function, and CC for Cellular Component
#### do the Gene Ontology Classification
### Biological Process
ggo <- groupGO(gene= gene, OrgDb = org.Hs.eg.db, ont = "BP", level = 3, readable = FALSE)
ggo.data=as.data.frame(ggo)
head(summary(ggo.data))
## ID
## GO:0000075: 1
## GO:0000728: 1
## GO:0000734: 1
## GO:0000742: 1
## GO:0000743: 1
## GO:0000920: 1
## Description
## B cell selection : 1
## DNA clamp unloading : 1
## DNA recombinase assembly involved in gene conversion at mating-type locus: 1
## FtsZ-dependent cytokinesis : 1
## RNA folding : 1
## Sertoli cell proliferation : 1
## Count GeneRatio
## Min. : 0.000 0/98 :426
## 1st Qu.: 0.000 1/98 : 34
## Median : 0.000 2/98 : 16
## Mean : 2.636 5/98 : 12
## 3rd Qu.: 0.000 3/98 : 10
## Max. :79.000 7/98 : 8
## geneID
## :426
## 22955/7543/23598/6477/55905/5901/1069: 5
## 7543 : 5
## 11334 : 4
## 10201 : 3
## 1861 : 3
#plot (ggo.data) --> ugly plot with no usable informations
# visualization
barplot(ggo, drop=TRUE, showCategory=12)

### Cellular Component
ggo2 <- groupGO(gene= gene, OrgDb = org.Hs.eg.db, ont = "CC", level = 3, readable = FALSE)
barplot(ggo2, drop=TRUE, showCategory=12)

### Molecular Function
ggo3 <- groupGO(gene= gene, OrgDb = org.Hs.eg.db, ont = "MF", level = 3, readable = FALSE)
barplot(ggo3, drop=TRUE, showCategory=12)

########################################## enrich GO ########################################################
# only works with gene id ENSEMBL
library(org.Hs.eg.db)
# create gene data frame ans translate it
gene2 <- row.names(biomarkers)
gene.df <- bitr(gene2, fromType = "SYMBOL",
toType = c("ENSEMBL", "ENTREZID"),
OrgDb = org.Hs.eg.db)
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(gene2, fromType = "SYMBOL", toType = c("ENSEMBL",
## "ENTREZID"), : 2% of input gene IDs are fail to map...
##############################################################################################################
### Cellular Component
ego1 <- enrichGO(gene = gene.df$ENSEMBL,
OrgDb = org.Hs.eg.db,
keyType = 'ENSEMBL',
ont = "CC",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
head(summary(ego1))
## Warning in summary(ego1): summary method to convert the object to
## data.frame is deprecated, please use as.data.frame instead.
## ID Description GeneRatio BgRatio
## GO:0000938 GO:0000938 GARP complex 7/105 11/21794
## GO:1990745 GO:1990745 EARP complex 7/105 11/21794
## GO:0099023 GO:0099023 tethering complex 7/105 78/21794
## GO:0055037 GO:0055037 recycling endosome 9/105 227/21794
## GO:0005802 GO:0005802 trans-Golgi network 10/105 295/21794
## GO:0032588 GO:0032588 trans-Golgi network membrane 7/105 155/21794
## pvalue p.adjust qvalue
## GO:0000938 1.597129e-14 2.220009e-12 1.882931e-12
## GO:1990745 1.597129e-14 2.220009e-12 1.882931e-12
## GO:0099023 9.819549e-08 9.099449e-06 7.717821e-06
## GO:0055037 1.555296e-06 9.158570e-05 7.767966e-05
## GO:0005802 1.647225e-06 9.158570e-05 7.767966e-05
## GO:0032588 1.020634e-05 4.728939e-04 4.010913e-04
## geneID
## GO:0000938 ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014
## GO:1990745 ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014
## GO:0099023 ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014
## GO:0055037 ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014/ENSG00000129968/ENSG00000132341
## GO:0005802 ENSG00000155252/ENSG00000149182/ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014/ENSG00000132376
## GO:0032588 ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014
## Count
## GO:0000938 7
## GO:1990745 7
## GO:0099023 7
## GO:0055037 9
## GO:0005802 10
## GO:0032588 7
# visualization
dotplot(ego1, showCategory=3)

cnetplot(ego1)

emapplot(ego1)

##############################################################################################################
### Biological Process
ego2 <- enrichGO(gene = gene.df$ENSEMBL,
OrgDb = org.Hs.eg.db,
keyType = 'ENSEMBL',
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
head(summary(ego2))
## Warning in summary(ego2): summary method to convert the object to
## data.frame is deprecated, please use as.data.frame instead.
## ID Description GeneRatio
## GO:0010668 GO:0010668 ectodermal cell differentiation 7/104
## GO:0006896 GO:0006896 Golgi to vacuole transport 7/104
## GO:0007398 GO:0007398 ectoderm development 7/104
## GO:0042147 GO:0042147 retrograde transport, endosome to Golgi 9/104
## GO:0032456 GO:0032456 endocytic recycling 7/104
## GO:0048566 GO:0048566 embryonic digestive tract development 7/104
## BgRatio pvalue p.adjust qvalue
## GO:0010668 14/20505 2.344304e-13 2.944446e-10 2.591073e-10
## GO:0006896 20/20505 5.165006e-12 3.243624e-09 2.854346e-09
## GO:0007398 30/20505 1.301306e-10 5.448136e-08 4.794287e-08
## GO:0042147 97/20505 1.541801e-09 4.841256e-07 4.260240e-07
## GO:0032456 43/20505 1.951711e-09 4.902698e-07 4.314309e-07
## GO:0048566 49/20505 5.074913e-09 1.062349e-06 9.348524e-07
## geneID
## GO:0010668 ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014
## GO:0006896 ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014
## GO:0007398 ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014
## GO:0042147 ENSG00000008294/ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014/ENSG00000089006
## GO:0032456 ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014
## GO:0048566 ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014
## Count
## GO:0010668 7
## GO:0006896 7
## GO:0007398 7
## GO:0042147 9
## GO:0032456 7
## GO:0048566 7
# visualization
dotplot(ego2, showCategory=10)

cnetplot(ego2)

emapplot(ego2)

##############################################################################################################
### Molecular Function
ego3 <- enrichGO(gene = gene.df$ENSEMBL,
OrgDb = org.Hs.eg.db,
keyType = 'ENSEMBL',
ont = "MF",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
head(summary(ego3))
## Warning in summary(ego3): summary method to convert the object to
## data.frame is deprecated, please use as.data.frame instead.
## ID
## GO:0000149 GO:0000149
## GO:0019905 GO:0019905
## GO:0001013 GO:0001013
## GO:0001163 GO:0001163
## GO:0001164 GO:0001164
## GO:0003713 GO:0003713
## Description
## GO:0000149 SNARE binding
## GO:0019905 syntaxin binding
## GO:0001013 RNA polymerase I regulatory region DNA binding
## GO:0001163 RNA polymerase I regulatory region sequence-specific DNA binding
## GO:0001164 RNA polymerase I CORE element sequence-specific DNA binding
## GO:0003713 transcription coactivator activity
## GeneRatio BgRatio pvalue p.adjust qvalue
## GO:0000149 8/106 113/19626 1.774625e-07 3.572971e-05 3.355989e-05
## GO:0019905 7/106 81/19626 2.748439e-07 3.572971e-05 3.355989e-05
## GO:0001013 3/106 10/19626 1.787711e-05 8.403651e-04 7.893308e-04
## GO:0001163 3/106 10/19626 1.787711e-05 8.403651e-04 7.893308e-04
## GO:0001164 3/106 10/19626 1.787711e-05 8.403651e-04 7.893308e-04
## GO:0003713 10/106 352/19626 2.100264e-05 8.403651e-04 7.893308e-04
## geneID
## GO:0000149 ENSG00000034713/ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014
## GO:0019905 ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014
## GO:0001013 ENSG00000108312/ENSG00000099956/ENSG00000275837
## GO:0001163 ENSG00000108312/ENSG00000099956/ENSG00000275837
## GO:0001164 ENSG00000108312/ENSG00000099956/ENSG00000275837
## GO:0003713 ENSG00000160783/ENSG00000148840/ENSG00000172534/ENSG00000101216/ENSG00000197724/ENSG00000005889/ENSG00000099956/ENSG00000275837/ENSG00000135801/ENSG00000132341
## Count
## GO:0000149 8
## GO:0019905 7
## GO:0001013 3
## GO:0001163 3
## GO:0001164 3
## GO:0003713 10
# visualization
dotplot(ego3, showCategory=100)

cnetplot(ego3)

emapplot(ego3)

### creat FC data
FC <- TreatedVorinostat - UntreatedVorinostat
# work with mean of the rows because we only want to compare the genes
FC_meanrow= rowMeans(FC)
#################################################################################################################
### sort the data
# work with absolute value to find the highest values
# because we want to have the most up and down regulated genes
FC_abs= abs(FC_meanrow)
## sort the values to get the 100 largest values
sortedFC_abs <- sort(FC_abs, decreasing = TRUE)
sortedFC_abs <- as.matrix(sortedFC_abs)
# take the first 100 for biomarkers
biomarkers_FC = sortedFC_abs[1:100,]
biomarkers_FC <- as.matrix(biomarkers_FC)
# see that the last ones have very similar values
# creat vector with gene names
biomarkers_FC_genes= row.names(biomarkers_FC)
#################################################################################################################
### creat matrix with FC values (positive and negativ)
# add the abs values to FC matrix
FC_both= cbind(FC_meanrow,FC_abs)
FC_both=as.data.frame(FC_both)
# order this matrix
FC_both_sorted <- FC_both[order(FC_both$FC_abs, decreasing = TRUE),]
# FC values of biomarkers
# take the first 100 of the sorted matrix, should be the same as un biomarkers_FC
biomarkers_FC_values = FC_both_sorted[1:100,]
# remove the absolute values
biomarkers_FC_values <- subset( biomarkers_FC_values, select = -FC_abs)
biomarkers_FC_values = as.matrix(biomarkers_FC_values)
library(KEGG.db)
##
## KEGG.db contains mappings based on older data because the original
## resource was removed from the the public domain before the most
## recent update was produced. This package should now be
## considered deprecated and future versions of Bioconductor may
## not have it available. Users who want more current data are
## encouraged to look at the KEGGREST or reactome.db packages
# see for other applications of this libary:
# https://bioconductor.org/packages/release/data/annotation/manuals/KEGG.db/man/KEGG.db.pdf
###################################################################################################
# works better with biomarkers from FC
library(org.Hs.eg.db)
gene2 <- row.names(biomarkers_FC)
gene.df <- bitr(gene2, fromType = "SYMBOL",
toType = c("ENSEMBL", "ENTREZID"),
OrgDb = org.Hs.eg.db)
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(gene2, fromType = "SYMBOL", toType = c("ENSEMBL",
## "ENTREZID"), : 10% of input gene IDs are fail to map...
kk <- enrichKEGG(gene=gene.df$ENTREZID,pvalueCutoff = 0.05)
head(summary(kk))
## Warning in summary(kk): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
## ID Description GeneRatio
## hsa05202 hsa05202 Transcriptional misregulation in cancer 7/46
## hsa04215 hsa04215 Apoptosis - multiple species 3/46
## hsa05203 hsa05203 Viral carcinogenesis 6/46
## hsa04210 hsa04210 Apoptosis 5/46
## hsa05166 hsa05166 Human T-cell leukemia virus 1 infection 6/46
## BgRatio pvalue p.adjust qvalue
## hsa05202 186/7866 9.055915e-05 0.01331219 0.01162970
## hsa04215 33/7866 9.032146e-04 0.04092195 0.03574993
## hsa05203 201/7866 1.032008e-03 0.04092195 0.03574993
## hsa04210 136/7866 1.113522e-03 0.04092195 0.03574993
## hsa05166 219/7866 1.605648e-03 0.04720605 0.04123980
## geneID Count
## hsa05202 8091/4790/330/4291/4804/8900/1026 7
## hsa04215 330/637/4804 3
## hsa05203 3017/4790/8349/8365/8900/1026 6
## hsa04210 4001/4790/2353/330/637 5
## hsa05166 4790/2353/8061/8829/8900/1026 6
# visualization
barplot(kk,showCategory=12)

dotplot(kk, showCategory=12)

cnetplot(kk,categorySize="geneNum")

emapplot(kk)

# problem: only finds 5 categories
################################# Gene Ontology analysis with FC biomarkers ##########################################
# load biomarkers over FC
# matrix we will work with:
biomarkers_FC_genes= row.names(biomarkers_FC)
biomarkers_FC_values = as.matrix(biomarkers_FC_values)
# load the library
library(clusterProfiler)
#################################################################################################################
### translate the gene names
library(org.Hs.eg.db)
# do the translation
# index 2 because already done analysis with biomarkers from p.values
translated.genes2= bitr(biomarkers_FC_genes,fromType="SYMBOL", toType="ENTREZID",OrgDb = org.Hs.eg.db)
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(biomarkers_FC_genes, fromType = "SYMBOL", toType =
## "ENTREZID", : 10% of input gene IDs are fail to map...
head(translated.genes2)
## SYMBOL ENTREZID
## 1 DHRS2 10202
## 2 ABAT 18
## 3 SERPINI1 5274
## 6 CLU 1191
## 7 NMI 9111
## 8 STC1 6781
#################################################################################################################
#### Gene Ontology Classification
# Notes: BP for Biological Process, MF for Molecular Function, and CC for Cellular Component
library(DOSE)
# take the needed gene ID
gene2=translated.genes2$ENTREZID
head(gene2)
## [1] "10202" "18" "5274" "1191" "9111" "6781"
### do classification
## 1. Biological Process
ggo2.1 <- groupGO(gene= gene2, OrgDb = org.Hs.eg.db, ont = "BP", level = 3, readable = FALSE)
head(summary(ggo2.1))
## Warning in summary(ggo2.1): summary method to convert the object to
## data.frame is deprecated, please use as.data.frame instead.
## ID Description Count
## GO:0019953 GO:0019953 sexual reproduction 4
## GO:0019954 GO:0019954 asexual reproduction 0
## GO:0022414 GO:0022414 reproductive process 8
## GO:0032504 GO:0032504 multicellular organism reproduction 4
## GO:0032505 GO:0032505 reproduction of a single-celled organism 0
## GO:0061887 GO:0061887 reproduction of symbiont in host 0
## GeneRatio geneID
## GO:0019953 4/90 18/8061/330/8900
## GO:0019954 0/90
## GO:0022414 8/90 18/6781/374/2353/10370/8061/330/8900
## GO:0032504 4/90 6781/8061/330/8900
## GO:0032505 0/90
## GO:0061887 0/90
# visualization
barplot(ggo2.1, drop=TRUE, showCategory=12)

## 2. Molecular Function
ggo2.2 <- groupGO(gene= gene2, OrgDb = org.Hs.eg.db, ont = "MF", level = 3, readable = FALSE)
head(summary(ggo2.2))
## Warning in summary(ggo2.2): summary method to convert the object to
## data.frame is deprecated, please use as.data.frame instead.
## ID
## GO:0001070 GO:0001070
## GO:0001072 GO:0001072
## GO:0001073 GO:0001073
## GO:0001134 GO:0001134
## GO:0003700 GO:0003700
## GO:0003711 GO:0003711
## Description
## GO:0001070 RNA-binding transcription regulator activity
## GO:0001072 transcription antitermination factor activity, RNA binding
## GO:0001073 transcription antitermination factor activity, DNA binding
## GO:0001134 transcription regulator recruiting activity
## GO:0003700 DNA-binding transcription factor activity
## GO:0003711 transcription elongation regulator activity
## Count GeneRatio geneID
## GO:0001070 0 0/90
## GO:0001072 0 0/90
## GO:0001073 0 0/90
## GO:0001134 0 0/90
## GO:0003700 9 9/90 8091/4790/2353/10370/8061/2305/2969/23635/467
## GO:0003711 0 0/90
# visualization
barplot(ggo2.2, drop=TRUE, showCategory=12)

## 3. Cellular Component
ggo2.3 <- groupGO(gene= gene2, OrgDb = org.Hs.eg.db, ont = "CC", level = 3, readable = FALSE)
head(summary(ggo2.3))
## Warning in summary(ggo2.3): summary method to convert the object to
## data.frame is deprecated, please use as.data.frame instead.
## ID Description Count GeneRatio
## GO:0005886 GO:0005886 plasma membrane 25 25/90
## GO:0005628 GO:0005628 prospore membrane 0 0/90
## GO:0005789 GO:0005789 endoplasmic reticulum membrane 4 4/90
## GO:0019867 GO:0019867 outer membrane 1 1/90
## GO:0031090 GO:0031090 organelle membrane 17 17/90
## GO:0034357 GO:0034357 photosynthetic membrane 0 0/90
## geneID
## GO:0005886 6781/57030/8744/55256/4758/79850/360/5168/23208/7205/11151/80328/8061/4900/1490/10486/8829/55915/6515/1368/27340/6253/27131/4804/306
## GO:0005628
## GO:0005789 374/9526/6253/2281
## GO:0019867 637
## GO:0031090 1191/374/7298/57030/4758/4001/23208/51131/11151/4900/57134/6515/64921/27131/637/2281/306
## GO:0034357
# visualization
barplot(ggo2.3, drop=TRUE, showCategory=12)

#################################################################################################################
### enrich GO
# only works with gene id ENSEMBL
library(org.Hs.eg.db)
# create gene data frame ans translate it
gene2.2 <- row.names(biomarkers_FC_values)
gene.df2 <- bitr(gene2.2, fromType = "SYMBOL",
toType = c("ENSEMBL", "ENTREZID"),
OrgDb = org.Hs.eg.db)
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(gene2.2, fromType = "SYMBOL", toType = c("ENSEMBL",
## "ENTREZID"), : 10% of input gene IDs are fail to map...
##############################################################################################################
### Cellular Component
ego2.1 <- enrichGO(gene = gene.df2$ENSEMBL,
OrgDb = org.Hs.eg.db,
keyType = 'ENSEMBL',
ont = "CC",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
head(summary(ego2.1))
## Warning in summary(ego2.1): summary method to convert the object to
## data.frame is deprecated, please use as.data.frame instead.
## ID Description GeneRatio BgRatio
## GO:0035580 GO:0035580 specific granule lumen 9/98 83/21794
## GO:0042581 GO:0042581 specific granule 11/98 213/21794
## GO:0043202 GO:0043202 lysosomal lumen 8/98 111/21794
## GO:0034774 GO:0034774 secretory granule lumen 12/98 374/21794
## GO:0060205 GO:0060205 cytoplasmic vesicle lumen 12/98 392/21794
## GO:0031983 GO:0031983 vesicle lumen 12/98 393/21794
## pvalue p.adjust qvalue
## GO:0035580 1.290481e-10 2.864867e-08 2.445121e-08
## GO:0042581 3.196320e-09 3.547915e-07 3.028092e-07
## GO:0043202 3.773459e-08 2.792360e-06 2.383237e-06
## GO:0034774 1.180312e-07 6.550730e-06 5.590950e-06
## GO:0060205 1.958048e-07 7.445533e-06 6.354652e-06
## GO:0031983 2.012306e-07 7.445533e-06 6.354652e-06
## geneID
## GO:0035580 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957/ENSG00000109320
## GO:0042581 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957/ENSG00000109320/ENSG00000059804/ENSG00000138772
## GO:0043202 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957
## GO:0034774 ENSG00000163536/ENSG00000120885/ENSG00000103196/ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957/ENSG00000109320
## GO:0060205 ENSG00000163536/ENSG00000120885/ENSG00000103196/ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957/ENSG00000109320
## GO:0031983 ENSG00000163536/ENSG00000120885/ENSG00000103196/ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957/ENSG00000109320
## Count
## GO:0035580 9
## GO:0042581 11
## GO:0043202 8
## GO:0034774 12
## GO:0060205 12
## GO:0031983 12
# visualization
dotplot(ego2.1, showCategory=3)

cnetplot(ego2.1)

emapplot(ego2.1)

##############################################################################################################
### Biological Process
ego2.2 <- enrichGO(gene = gene.df2$ENSEMBL,
OrgDb = org.Hs.eg.db,
keyType = 'ENSEMBL',
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
head(summary(ego2.2))
## Warning in summary(ego2.2): summary method to convert the object to
## data.frame is deprecated, please use as.data.frame instead.
## ID Description GeneRatio
## GO:0006689 GO:0006689 ganglioside catabolic process 8/95
## GO:0009313 GO:0009313 oligosaccharide catabolic process 8/95
## GO:0046479 GO:0046479 glycosphingolipid catabolic process 8/95
## GO:0019377 GO:0019377 glycolipid catabolic process 8/95
## GO:0046514 GO:0046514 ceramide catabolic process 8/95
## GO:0001573 GO:0001573 ganglioside metabolic process 8/95
## BgRatio pvalue p.adjust qvalue
## GO:0006689 14/20505 4.609982e-16 9.385924e-13 8.477514e-13
## GO:0009313 22/20505 4.762652e-14 3.232253e-11 2.919422e-11
## GO:0046479 22/20505 4.762652e-14 3.232253e-11 2.919422e-11
## GO:0019377 24/20505 1.087160e-13 5.533646e-11 4.998076e-11
## GO:0046514 25/20505 1.592733e-13 6.485610e-11 5.857906e-11
## GO:0001573 30/20505 8.457689e-13 2.869976e-10 2.592208e-10
## geneID
## GO:0006689 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957
## GO:0009313 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957
## GO:0046479 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957
## GO:0019377 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957
## GO:0046514 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957
## GO:0001573 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957
## Count
## GO:0006689 8
## GO:0009313 8
## GO:0046479 8
## GO:0019377 8
## GO:0046514 8
## GO:0001573 8
# visualization
dotplot(ego2.2, showCategory=10)

cnetplot(ego2.2)

emapplot(ego2.2)

##############################################################################################################
### Molecular Function
ego2.3 <- enrichGO(gene = gene.df2$ENSEMBL,
OrgDb = org.Hs.eg.db,
keyType = 'ENSEMBL',
ont = "MF",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
head(summary(ego2.3))
## Warning in summary(ego2.3): summary method to convert the object to
## data.frame is deprecated, please use as.data.frame instead.
## ID Description
## GO:0052794 GO:0052794 exo-alpha-(2->3)-sialidase activity
## GO:0052795 GO:0052795 exo-alpha-(2->6)-sialidase activity
## GO:0052796 GO:0052796 exo-alpha-(2->8)-sialidase activity
## GO:0004308 GO:0004308 exo-alpha-sialidase activity
## GO:0016997 GO:0016997 alpha-sialidase activity
## GO:0004553 GO:0004553 hydrolase activity, hydrolyzing O-glycosyl compounds
## GeneRatio BgRatio pvalue p.adjust qvalue
## GO:0052794 8/96 12/19626 1.184943e-16 1.228391e-14 1.130893e-14
## GO:0052795 8/96 12/19626 1.184943e-16 1.228391e-14 1.130893e-14
## GO:0052796 8/96 12/19626 1.184943e-16 1.228391e-14 1.130893e-14
## GO:0004308 8/96 14/19626 7.131444e-16 4.435758e-14 4.083690e-14
## GO:0016997 8/96 14/19626 7.131444e-16 4.435758e-14 4.083690e-14
## GO:0004553 9/96 105/19626 2.233203e-09 1.157544e-07 1.065669e-07
## geneID
## GO:0052794 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957
## GO:0052795 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957
## GO:0052796 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957
## GO:0004308 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957
## GO:0016997 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957
## GO:0004553 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957/ENSG00000117643
## Count
## GO:0052794 8
## GO:0052795 8
## GO:0052796 8
## GO:0004308 8
## GO:0016997 8
## GO:0004553 9
# visualization
dotplot(ego2.3, showCategory=10)

cnetplot(ego2.3)

emapplot(ego2.3)
